Load libraries
library("ggplot2")
library("tidyverse")
library("betareg")
library("emmeans")
library("dplyr")
Import the data
raw_data <- read.csv("5-Developmental_progression_and_size/Input/cell_cleavage.csv" , header = TRUE, sep = ",")
Change sample_ID and Treatment to Factor variables
raw_data$sample_ID <- as.factor(raw_data$sample_ID)
str(raw_data$sample_ID)
## Factor w/ 9 levels "144","145","146",..: 1 2 3 4 5 6 7 8 9
raw_data$treatment <- as.factor(raw_data$treatment)
str(raw_data$treatment)
## Factor w/ 3 levels "AMB","l","xl": 1 1 2 3 3 2 1 2 3
Calculate total cells per sample
total_counts_per_sample <- raw_data %>%
mutate(total_counts_per_sample = One_cell+Two_cell+Four_cell+Greater_than_4_cell) %>%
arrange(treatment)
show(total_counts_per_sample)
## sample_ID treatment One_cell Two_cell Four_cell Greater_than_4_cell
## 1 144 AMB 72 31 32 18
## 2 145 AMB 47 35 37 92
## 3 150 AMB 220 5 8 4
## 4 146 l 80 85 65 18
## 5 149 l 43 37 9 2
## 6 151 l 124 85 58 15
## 7 147 xl 18 17 34 13
## 8 148 xl 98 58 14 2
## 9 152 xl 91 95 41 10
## total_counts_per_sample
## 1 153
## 2 211
## 3 237
## 4 248
## 5 91
## 6 282
## 7 82
## 8 172
## 9 237
Calculate % at each stage
normalized_cleavage_counts <- total_counts_per_sample %>%
mutate(percent_one_cell = (One_cell/total_counts_per_sample)) %>%
mutate(percent_two_cell = (Two_cell/total_counts_per_sample)) %>%
mutate(percent_four_cell = (Four_cell/total_counts_per_sample)) %>%
mutate(percent_greater_than_four_cell = (Greater_than_4_cell/total_counts_per_sample))
show(normalized_cleavage_counts)
## sample_ID treatment One_cell Two_cell Four_cell Greater_than_4_cell
## 1 144 AMB 72 31 32 18
## 2 145 AMB 47 35 37 92
## 3 150 AMB 220 5 8 4
## 4 146 l 80 85 65 18
## 5 149 l 43 37 9 2
## 6 151 l 124 85 58 15
## 7 147 xl 18 17 34 13
## 8 148 xl 98 58 14 2
## 9 152 xl 91 95 41 10
## total_counts_per_sample percent_one_cell percent_two_cell percent_four_cell
## 1 153 0.4705882 0.20261438 0.20915033
## 2 211 0.2227488 0.16587678 0.17535545
## 3 237 0.9282700 0.02109705 0.03375527
## 4 248 0.3225806 0.34274194 0.26209677
## 5 91 0.4725275 0.40659341 0.09890110
## 6 282 0.4397163 0.30141844 0.20567376
## 7 82 0.2195122 0.20731707 0.41463415
## 8 172 0.5697674 0.33720930 0.08139535
## 9 237 0.3839662 0.40084388 0.17299578
## percent_greater_than_four_cell
## 1 0.11764706
## 2 0.43601896
## 3 0.01687764
## 4 0.07258065
## 5 0.02197802
## 6 0.05319149
## 7 0.15853659
## 8 0.01162791
## 9 0.04219409
str(normalized_cleavage_counts)
## 'data.frame': 9 obs. of 11 variables:
## $ sample_ID : Factor w/ 9 levels "144","145","146",..: 1 2 7 3 6 8 4 5 9
## $ treatment : Factor w/ 3 levels "AMB","l","xl": 1 1 1 2 2 2 3 3 3
## $ One_cell : int 72 47 220 80 43 124 18 98 91
## $ Two_cell : int 31 35 5 85 37 85 17 58 95
## $ Four_cell : int 32 37 8 65 9 58 34 14 41
## $ Greater_than_4_cell : int 18 92 4 18 2 15 13 2 10
## $ total_counts_per_sample : int 153 211 237 248 91 282 82 172 237
## $ percent_one_cell : num 0.471 0.223 0.928 0.323 0.473 ...
## $ percent_two_cell : num 0.2026 0.1659 0.0211 0.3427 0.4066 ...
## $ percent_four_cell : num 0.2092 0.1754 0.0338 0.2621 0.0989 ...
## $ percent_greater_than_four_cell: num 0.1176 0.436 0.0169 0.0726 0.022 ...
gathered_cleavage_counts <- normalized_cleavage_counts %>%
select(sample_ID, treatment, percent_one_cell:percent_greater_than_four_cell) %>%
gather(cleavage_stage, proportion, -sample_ID, -treatment, convert = FALSE, na.rm = TRUE, factor_key = TRUE)
show(gathered_cleavage_counts)
## sample_ID treatment cleavage_stage proportion
## 1 144 AMB percent_one_cell 0.47058824
## 2 145 AMB percent_one_cell 0.22274882
## 3 150 AMB percent_one_cell 0.92827004
## 4 146 l percent_one_cell 0.32258065
## 5 149 l percent_one_cell 0.47252747
## 6 151 l percent_one_cell 0.43971631
## 7 147 xl percent_one_cell 0.21951220
## 8 148 xl percent_one_cell 0.56976744
## 9 152 xl percent_one_cell 0.38396624
## 10 144 AMB percent_two_cell 0.20261438
## 11 145 AMB percent_two_cell 0.16587678
## 12 150 AMB percent_two_cell 0.02109705
## 13 146 l percent_two_cell 0.34274194
## 14 149 l percent_two_cell 0.40659341
## 15 151 l percent_two_cell 0.30141844
## 16 147 xl percent_two_cell 0.20731707
## 17 148 xl percent_two_cell 0.33720930
## 18 152 xl percent_two_cell 0.40084388
## 19 144 AMB percent_four_cell 0.20915033
## 20 145 AMB percent_four_cell 0.17535545
## 21 150 AMB percent_four_cell 0.03375527
## 22 146 l percent_four_cell 0.26209677
## 23 149 l percent_four_cell 0.09890110
## 24 151 l percent_four_cell 0.20567376
## 25 147 xl percent_four_cell 0.41463415
## 26 148 xl percent_four_cell 0.08139535
## 27 152 xl percent_four_cell 0.17299578
## 28 144 AMB percent_greater_than_four_cell 0.11764706
## 29 145 AMB percent_greater_than_four_cell 0.43601896
## 30 150 AMB percent_greater_than_four_cell 0.01687764
## 31 146 l percent_greater_than_four_cell 0.07258065
## 32 149 l percent_greater_than_four_cell 0.02197802
## 33 151 l percent_greater_than_four_cell 0.05319149
## 34 147 xl percent_greater_than_four_cell 0.15853659
## 35 148 xl percent_greater_than_four_cell 0.01162791
## 36 152 xl percent_greater_than_four_cell 0.04219409
Beta regression models are suggested for proportional data instead of t-tests and anovas because the data is often not normally distributed and homoscedastic. Here, we use a beta regression test (as described by S. S. Mangiafico in the RCompanion Handbook chapter, Beta Regression for Percent and Proportion Data).
model = betareg(proportion ~ cleavage_stage + treatment + cleavage_stage:treatment,
data = gathered_cleavage_counts) #Make the model
joint_tests(model) #do the test
## model term df1 df2 F.ratio p.value
## cleavage_stage 3 Inf 14.144 <.0001
## treatment 2 Inf 0.055 0.9468
## cleavage_stage:treatment 6 Inf 1.902 0.0765
summary(model) #examine the results
##
## Call:
## betareg(formula = proportion ~ cleavage_stage + treatment + cleavage_stage:treatment,
## data = gathered_cleavage_counts)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -2.8435 -0.8755 0.1297 0.6620 3.7332
##
## Coefficients (mean model with logit link):
## Estimate Std. Error
## (Intercept) 0.35537 0.36444
## cleavage_stagepercent_two_cell -2.23215 0.59653
## cleavage_stagepercent_four_cell -2.09342 0.58567
## cleavage_stagepercent_greater_than_four_cell -2.11844 0.58760
## treatmentl -0.68147 0.51506
## treatmentxl -0.78987 0.51718
## cleavage_stagepercent_two_cell:treatmentl 2.00324 0.79018
## cleavage_stagepercent_four_cell:treatmentl 1.09678 0.80330
## cleavage_stagepercent_greater_than_four_cell:treatmentl 0.06892 0.85542
## cleavage_stagepercent_two_cell:treatmentxl 1.95154 0.79456
## cleavage_stagepercent_four_cell:treatmentxl 1.27565 0.80205
## cleavage_stagepercent_greater_than_four_cell:treatmentxl 0.17514 0.85667
## z value Pr(>|z|)
## (Intercept) 0.975 0.329517
## cleavage_stagepercent_two_cell -3.742 0.000183 ***
## cleavage_stagepercent_four_cell -3.574 0.000351 ***
## cleavage_stagepercent_greater_than_four_cell -3.605 0.000312 ***
## treatmentl -1.323 0.185808
## treatmentxl -1.527 0.126691
## cleavage_stagepercent_two_cell:treatmentl 2.535 0.011239 *
## cleavage_stagepercent_four_cell:treatmentl 1.365 0.172144
## cleavage_stagepercent_greater_than_four_cell:treatmentl 0.081 0.935789
## cleavage_stagepercent_two_cell:treatmentxl 2.456 0.014044 *
## cleavage_stagepercent_four_cell:treatmentxl 1.590 0.111728
## cleavage_stagepercent_greater_than_four_cell:treatmentxl 0.204 0.838012
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 9.219 2.154 4.281 1.86e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 29.5 on 13 Df
## Pseudo R-squared: 0.5646
## Number of iterations: 20 (BFGS) + 2 (Fisher scoring)
plot(model) #Diagnostic plots
In summary, we have significant differences in the proportion of cells at different cleavage stages within a treatment, but there is not significant variation in the proportion of cells at different cleavage stages between treatments.
Calculate stats for cleavage stage and treatment categories
##Just treatment
gathered_cleavage_counts$proportion <- (gathered_cleavage_counts$proportion)*100 #data had to be proportions for the statistical test but we want them to be plotted as percentages. We do that here.
gathered_cleavage_counts <- rename(gathered_cleavage_counts, percent=proportion)
#Mean for all treatments should be 25 because 100/4=25. Standard dev may vary.
clvgstats <- select(gathered_cleavage_counts, treatment:percent)
clvgstats.amb <- clvgstats %>% filter(treatment=="AMB") %>%
summarise(mean=mean(percent), sd=sd(percent), min=min(percent), max=max(percent))
clvgstats.l <- gathered_cleavage_counts %>% filter(treatment=="l") %>%
summarise(mean=mean(percent), sd=sd(percent), min=min(percent), max=max(percent))
clvgstats.xl <- gathered_cleavage_counts %>% filter(treatment=="xl") %>%
summarise(mean=mean(percent), sd=sd(percent), min=min(percent), max=max(percent))
treatment.clvgstats.rownames <- as.factor(c("AMB", "l", "xl"))
treatment.clvgstats <- as.data.frame(bind_rows(clvgstats.amb, clvgstats.l, clvgstats.xl, .id=NULL))
treatment.clvgstats$cleavage_stage <- treatment.clvgstats.rownames
treatment.clvgstats <- treatment.clvgstats[c(5,1,2,3,4)]
show(treatment.clvgstats)
## cleavage_stage mean sd min max
## 1 AMB 25 25.79005 1.687764 92.82700
## 2 l 25 15.77375 2.197802 47.25275
## 3 xl 25 17.13955 1.162791 56.97674
##Just cleavage
#Cleavage stage means should differ
clvgstats.onecell <- clvgstats %>% filter(cleavage_stage=="percent_one_cell") %>%
summarise(mean=mean(percent), sd=sd(percent), min=min(percent), max=max(percent))
clvgstats.twocell <- clvgstats %>% filter(cleavage_stage=="percent_two_cell") %>%
summarise(mean=mean(percent), sd=sd(percent), min=min(percent), max=max(percent))
clvgstats.fourcell <- clvgstats %>% filter(cleavage_stage=="percent_four_cell") %>%
summarise(mean=mean(percent), sd=sd(percent), min=min(percent), max=max(percent))
clvgstats.gfourcell <- clvgstats %>% filter(cleavage_stage=="percent_greater_than_four_cell") %>%
summarise(mean=mean(percent), sd=sd(percent), min=min(percent), max=max(percent))
stage.clvgstats.rownames <- as.factor(c("One Cell", "Two Cells", "Four Cells", "> Four Cells"))
stage.clvgstats <- as.data.frame(bind_rows(clvgstats.onecell, clvgstats.twocell, clvgstats.fourcell, clvgstats.gfourcell, .id=NULL))
stage.clvgstats$cleavage_stage <- stage.clvgstats.rownames
stage.clvgstats <- stage.clvgstats[c(5,1,2,3,4)]
show(stage.clvgstats)
## cleavage_stage mean sd min max
## 1 One Cell 44.77419 21.48958 21.951220 92.82700
## 2 Two Cells 26.50791 12.63031 2.109705 40.65934
## 3 Four Cells 18.37731 11.23231 3.375527 41.46341
## 4 > Four Cells 10.34058 13.39930 1.162791 43.60190
##Treatment within cleavage
clvgstats.onecell.amb <- clvgstats %>% filter(cleavage_stage=="percent_one_cell", treatment=="AMB") %>%
summarise(mean=mean(percent), sd=sd(percent), min=min(percent), max=max(percent))
clvgstats.onecell.l <- clvgstats %>% filter(cleavage_stage=="percent_one_cell", treatment=="l") %>%
summarise(mean=mean(percent), sd=sd(percent), min=min(percent), max=max(percent))
clvgstats.onecell.xl <- clvgstats %>% filter(cleavage_stage=="percent_one_cell", treatment=="xl") %>%
summarise(mean=mean(percent), sd=sd(percent), min=min(percent), max=max(percent))
clvgstats.twocell.amb <- clvgstats %>% filter(cleavage_stage=="percent_two_cell", treatment=="AMB") %>%
summarise(mean=mean(percent), sd=sd(percent), min=min(percent), max=max(percent))
clvgstats.twocell.l <- clvgstats %>% filter(cleavage_stage=="percent_two_cell", treatment=="l") %>%
summarise(mean=mean(percent), sd=sd(percent), min=min(percent), max=max(percent))
clvgstats.twocell.xl <- clvgstats %>% filter(cleavage_stage=="percent_two_cell", treatment=="xl") %>%
summarise(mean=mean(percent), sd=sd(percent), min=min(percent), max=max(percent))
clvgstats.fourcell.amb <- clvgstats %>% filter(cleavage_stage=="percent_four_cell", treatment=="AMB") %>%
summarise(mean=mean(percent), sd=sd(percent), min=min(percent), max=max(percent))
clvgstats.fourcell.l <- clvgstats %>% filter(cleavage_stage=="percent_four_cell", treatment=="l") %>%
summarise(mean=mean(percent), sd=sd(percent), min=min(percent), max=max(percent))
clvgstats.fourcell.xl <- clvgstats %>% filter(cleavage_stage=="percent_four_cell", treatment=="xl") %>%
summarise(mean=mean(percent), sd=sd(percent), min=min(percent), max=max(percent))
clvgstats.gfourcell.amb <- clvgstats %>% filter(cleavage_stage=="percent_greater_than_four_cell", treatment=="AMB") %>%
summarise(mean=mean(percent), sd=sd(percent), min=min(percent), max=max(percent))
clvgstats.gfourcell.l <- clvgstats %>% filter(cleavage_stage=="percent_greater_than_four_cell", treatment=="l") %>%
summarise(mean=mean(percent), sd=sd(percent), min=min(percent), max=max(percent))
clvgstats.gfourcell.xl <- clvgstats %>% filter(cleavage_stage=="percent_greater_than_four_cell", treatment=="xl") %>%
summarise(mean=mean(percent), sd=sd(percent), min=min(percent), max=max(percent))
treatmentstage.clvgstats.rownames <- as.factor(c("OneCell_AMB", "OneCell_l", "OneCell_xl", "TwoCell_AMB", "TwoCell_l", "TwoCell_xl", "FourCell_AMB", "FourCell_l", "FourCell_xl", ">FourCell_AMB", ">FourCell_l", ">FourCell_xl"))
treatmentstage.clvgstats <- as.data.frame(bind_rows(clvgstats.onecell.amb, clvgstats.onecell.l, clvgstats.onecell.xl, clvgstats.twocell.amb, clvgstats.twocell.l, clvgstats.twocell.xl, clvgstats.fourcell.amb, clvgstats.fourcell.l, clvgstats.fourcell.xl, clvgstats.gfourcell.amb, clvgstats.gfourcell.l, clvgstats.gfourcell.xl, .id=NULL))
treatmentstage.clvgstats$cleavage_stage <- treatmentstage.clvgstats.rownames
treatmentstage.clvgstats <- treatmentstage.clvgstats[c(5,1,2,3,4)]
show(treatmentstage.clvgstats)
## cleavage_stage mean sd min max
## 1 OneCell_AMB 54.053570 35.792392 22.274882 92.827004
## 2 OneCell_l 41.160814 7.882617 32.258065 47.252747
## 3 OneCell_xl 39.108196 17.523601 21.951220 56.976744
## 4 TwoCell_AMB 12.986273 9.596819 2.109705 20.261438
## 5 TwoCell_l 35.025126 5.298807 30.141844 40.659341
## 6 TwoCell_xl 31.512342 9.863567 20.731707 40.084388
## 7 FourCell_AMB 13.942035 9.305565 3.375527 20.915033
## 8 FourCell_l 18.889054 8.288223 9.890110 26.209677
## 9 FourCell_xl 22.300843 17.215683 8.139535 41.463415
## 10 >FourCell_AMB 19.018122 21.878246 1.687764 43.601896
## 11 >FourCell_l 4.925005 2.553052 2.197802 7.258065
## 12 >FourCell_xl 7.078620 7.751562 1.162791 15.853659
Calculate means at each stage
mpc <- normalized_cleavage_counts %>%
group_by(treatment) %>%
summarize("One Cell" = mean(percent_one_cell),
"Two Cells" = mean(percent_two_cell),
"Four Cells" = mean(percent_four_cell),
"> Four Cells" = mean(percent_greater_than_four_cell)) %>%
gather(cleavage_stage, mean_percent_cleavage, -treatment, na.rm = TRUE, factor_key = TRUE)
show(mpc)
## # A tibble: 12 x 3
## treatment cleavage_stage mean_percent_cleavage
## <fct> <fct> <dbl>
## 1 AMB One Cell 0.541
## 2 l One Cell 0.412
## 3 xl One Cell 0.391
## 4 AMB Two Cells 0.130
## 5 l Two Cells 0.350
## 6 xl Two Cells 0.315
## 7 AMB Four Cells 0.139
## 8 l Four Cells 0.189
## 9 xl Four Cells 0.223
## 10 AMB > Four Cells 0.190
## 11 l > Four Cells 0.0493
## 12 xl > Four Cells 0.0708
Plot mean_percent_cleavage
final_cell_cleavage <- ggplot(mpc, aes(fill=cleavage_stage, y = mean_percent_cleavage, x=treatment)) +
geom_bar(position ="stack", stat = "identity") +
xlab("pH Treatment") + ylab("Percent per treatment") +
scale_fill_brewer(name = "Number of Cells", labels = c("One", "Two", "Four", "> Four"), palette = "Paired") + #colour modification
theme_bw() + #Set background color
theme(panel.border = element_blank(), # Set border
panel.grid.major = element_blank(), #Set major gridlines
panel.grid.minor = element_blank(), #Set minor gridlines
axis.line = element_line(colour = "black"), #Set axes color
plot.background=element_blank()) + #Set the plot background
ggtitle("(a)") + #add a main title
theme(plot.title = element_text(face = 'bold',
size = 12,
hjust = 0),
legend.position = c(0.5, 1), #Place legend top middle of figure
legend.direction = "horizontal", #Make legend list items in a row versus columns
legend.background = element_rect(fill = NA), #Legend block no-fill
legend.title = element_text(size = 10),
legend.text = element_text(size = 8), legend.key.size = unit(0.5, "cm"),
legend.key.width = unit(0.5,"cm"))
final_cell_cleavage
Load libraries
library("tidyverse")
library("dplyr")
library("ggplot2")
library("gridExtra")
Import the data
raw_data <- read_csv("5-Developmental_progression_and_size/Input/size_metric_timeseries_data.csv")
raw_data$sample_ID <- as.factor(raw_data$sample_ID)
raw_data$time_point <- as.factor(raw_data$time_point)
raw_data$treatment <- as.factor(raw_data$treatment)
raw_data$TANK_NUM <- as.factor(raw_data$TANK_NUM)
raw_data$metric_id <- as.factor(raw_data$metric_id)
Check the type of each variable
str(raw_data)
## tibble [850 × 7] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ sample_ID : Factor w/ 34 levels "124","125","135",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ time_point: Factor w/ 7 levels "20190409_201",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ treatment : Factor w/ 4 levels "AMB","Gastrulation",..: NA NA NA NA NA NA NA NA NA NA ...
## $ TANK_NUM : Factor w/ 15 levels "1","2","3","4",..: NA NA NA NA NA NA NA NA NA NA ...
## $ metric_id : Factor w/ 850 levels "124_egg_1","124_egg_10",..: 1 12 19 20 21 22 23 24 25 2 ...
## $ width1 : num [1:850] 0.521 0.49 0.559 0.491 0.537 0.538 0.607 0.581 0.471 0.444 ...
## $ width2 : num [1:850] 0.404 0.424 0.361 0.337 0.44 0.402 0.511 0.469 0.545 0.474 ...
## - attr(*, "spec")=
## .. cols(
## .. sample_ID = col_double(),
## .. time_point = col_character(),
## .. treatment = col_character(),
## .. TANK_NUM = col_double(),
## .. metric_id = col_character(),
## .. width1 = col_double(),
## .. width2 = col_double()
## .. )
egg_volume <- filter(raw_data, time_point == "Eggs") %>%
mutate(volume = ((4*pi*width1/6)*(width2/2)^2)) #Use equation for volume of ellipse
head(egg_volume)
## # A tibble: 6 x 8
## sample_ID time_point treatment TANK_NUM metric_id width1 width2 volume
## <fct> <fct> <fct> <fct> <fct> <dbl> <dbl> <dbl>
## 1 124 Eggs <NA> <NA> 124_egg_1 0.521 0.404 0.0445
## 2 124 Eggs <NA> <NA> 124_egg_2 0.49 0.424 0.0461
## 3 124 Eggs <NA> <NA> 124_egg_3 0.559 0.361 0.0381
## 4 124 Eggs <NA> <NA> 124_egg_4 0.491 0.337 0.0292
## 5 124 Eggs <NA> <NA> 124_egg_5 0.537 0.44 0.0544
## 6 124 Eggs <NA> <NA> 124_egg_6 0.538 0.402 0.0455
Examine distribution of egg volume data
egg_volume_boxplot <- ggplot(egg_volume, aes(y = volume, x=treatment)) +
ylab(expression("Volume " (mm^2))) + xlab("Treatment") + # Set x and y axis labels
#expand_limits(y=c(0, 1.0)) +
geom_jitter(colour = "cadetblue3", alpha = 0.5) +
geom_boxplot(colour="cadetblue3", alpha=0) +
theme_bw() + #Set background color
theme(panel.border = element_blank(), # Set border
panel.grid.major = element_blank(), #Set major gridlines
panel.grid.minor = element_blank(), #Set minor gridlines
axis.line = element_line(colour = "black"), #Set axes color
axis.title.x = element_blank(), # No axis title
axis.text.x = element_text(color = "White"), #White-out the x-axis label
axis.ticks.x = element_blank(), # No ticks on x-axis
plot.background=element_blank()) + #Set the plot background
ggtitle("(b)") + #add a main title
scale_color_manual(values=c(AMB="cadetblue3", l="palevioletred", xl="indianred3")) + #set treatment colors
theme(plot.title = element_text(face = 'bold',
size = 12,
hjust = 0),
legend.position = "bottom") #set title attributes
egg_volume_boxplot
#QQ-Plot
qqnorm(egg_volume$volume)
qqline(egg_volume$volume)
Examine egg volume for equal variance with residual regression.
#Create dataframe of predicted and residual values for each volume
egg_volume_res <- egg_volume #Copy dataset to manipulate for residual regression
egg_volume_fit <- lm(volume ~ sample_ID, data = egg_volume_res) # Fit the model
egg_volume_res$predicted <- predict(egg_volume_fit) # Save the predicted values
egg_volume_res$residuals <- residuals(egg_volume_fit) # Save the residual values
head(select(egg_volume_res, volume, predicted, residuals))
## # A tibble: 6 x 3
## volume predicted residuals
## <dbl> <dbl> <dbl>
## 1 0.0445 0.0495 -0.00494
## 2 0.0461 0.0495 -0.00334
## 3 0.0381 0.0495 -0.0113
## 4 0.0292 0.0495 -0.0203
## 5 0.0544 0.0495 0.00497
## 6 0.0455 0.0495 -0.00394
#Plot predicted (red) versus residuals (black)
egg_volume_res_scatter <- ggplot(egg_volume_res, aes(x = sample_ID, y = volume)) + # Plot volume versus treament as scatter
geom_point() +
geom_point(aes(y = predicted), color = "red") + # Add the predicted values
ylab("Volume mm2")+ xlab("Sample ID") + #Label axes
theme_bw() + #Set background color
theme(panel.border = element_blank(), # Set border
panel.grid.major = element_blank(), #Set major gridlines
panel.grid.minor = element_blank(), #Set minor gridlines
axis.line = element_line(colour = "black"), #Set axes color
plot.background=element_blank()) #Set the plot background
egg_volume_res_scatter
#Histogram of residuals
egg_volume_res_hist <- egg_volume_res %>%
ggplot(aes(x = residuals, fill = volume)) +
geom_histogram(binwidth = 0.01, color="#e9ecef", alpha=0.6, position = 'identity') +
labs(fill="") + # Plot volume versus treament as scatter
theme_bw() + #Set background color
theme(panel.border = element_blank(), # Set border
panel.grid.major = element_blank(), #Set major gridlines
panel.grid.minor = element_blank(), #Set minor gridlines
axis.line = element_line(colour = "black"), #Set axes color
plot.background=element_blank()) #Set the plot background)
egg_volume_res_hist
Examine summary statistics for untreated eggs
untreatedegg <- select(egg_volume, time_point, volume)
untreatedegg.stats <- untreatedegg %>%
summarise(mean=mean(volume), sd=sd(volume), min=min(volume), max=max(volume))
notreatment <- as.factor(c("NA"))
untreatedegg.stats$treatment <- notreatment
untreatedegg.stats.rownames <- as.factor(c("Untreated_Eggs"))
untreatedegg.stats$timepoint <-untreatedegg.stats.rownames
untreatedegg.stats <- untreatedegg.stats[c(6,5,1,2,3,4)]
show(untreatedegg.stats)
## # A tibble: 1 x 6
## timepoint treatment mean sd min max
## <fct> <fct> <dbl> <dbl> <dbl> <dbl>
## 1 Untreated_Eggs NA 0.0396 0.0230 0.00150 0.0830
final_egg_volume <- egg_volume_boxplot
final_egg_volume
#Use the equation for volume of ellipse to calculate fertilized egg volume.
fert_volume <- filter(raw_data, time_point == "Fertilized_embryos") %>%
mutate(volume = ((4*pi*width1/6)*(width2/2)^2)) #Use equation for volume of ellipse
head(fert_volume)
## # A tibble: 6 x 8
## sample_ID time_point treatment TANK_NUM metric_id width1 width2 volume
## <fct> <fct> <fct> <fct> <fct> <dbl> <dbl> <dbl>
## 1 135 Fertilized_e… AMB 1 135_fertilzed… 0.456 0.464 0.0514
## 2 135 Fertilized_e… AMB 1 135_fertilzed… 0.424 0.436 0.0422
## 3 135 Fertilized_e… AMB 1 135_fertilzed… 0.435 0.466 0.0495
## 4 135 Fertilized_e… AMB 1 135_fertilzed… 0.42 0.464 0.0473
## 5 135 Fertilized_e… AMB 1 135_fertilzed… 0.449 0.473 0.0526
## 6 135 Fertilized_e… AMB 1 135_fertilzed… 0.465 0.469 0.0536
Examine treated eggs volume for normal distribution using a boxplot with jitter and qq-plot. Examine variation between tanks using a boxplot with jitter.
#Boxplot colored by treatment
fert_volume_t_boxplot <- ggplot(fert_volume, aes(x = treatment, y = volume, color = treatment)) +
ylab(expression("Volume " (mm^2))) + xlab("Treatment") + #Label axes
geom_jitter(alpha = 0.5) +
geom_boxplot(alpha = 0) +
theme_bw() + #Set background color
scale_color_manual(values=c(AMB="cadetblue3", l="palevioletred", xl="indianred3")) + #set treatment colors
theme(panel.border = element_blank(), # Set border
panel.grid.major = element_blank(), #Set major gridlines
panel.grid.minor = element_blank(), #Set minor gridlines
axis.line = element_line(colour = "black"), #Set axes color
plot.background=element_blank()) + #Set the plot background
ggtitle("(c)") + #add a main title
scale_color_manual(values=c(AMB="cadetblue3", l="palevioletred", xl="indianred3")) + #set treatment colors
theme(plot.title = element_text(face = 'bold',
size = 12,
hjust = 0),
legend.position = "none") #set title attributes
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
fert_volume_t_boxplot
#Boxplot colored by TANK_NUM
fert_volume_s_boxplot <- ggplot(fert_volume, aes(x = treatment, y = volume, color = TANK_NUM)) +
ylab("Volume mm2")+ xlab("Treatment") + #Label axes
geom_jitter(aes(alpha = 0.3)) +
geom_boxplot(alpha = 0) +
theme_bw() + #Set background color
theme(panel.border = element_blank(), # Set border
panel.grid.major = element_blank(), #Set major gridlines
panel.grid.minor = element_blank(), #Set minor gridlines
axis.line = element_line(colour = "black"), #Set axes color
plot.background=element_blank()) #Set the plot background
fert_volume_s_boxplot
#QQ-Plot
qqnorm(fert_volume$volume)
qqline(fert_volume$volume)
Examine treated eggs volume for equal variance with residual regression.
#Create dataframe of predicted and residual values for each volume
fert_volume_res <- fert_volume #Copy dataset to manipulate for residual regression
fert_volume_fit <- lm(volume ~ treatment, data = fert_volume_res) # Fit the model
fert_volume_res$predicted <- predict(fert_volume_fit) # Save the predicted values
fert_volume_res$residuals <- residuals(fert_volume_fit) # Save the residual values
head(select(fert_volume_res, treatment, volume, predicted, residuals))
## # A tibble: 6 x 4
## treatment volume predicted residuals
## <fct> <dbl> <dbl> <dbl>
## 1 AMB 0.0514 0.0483 0.00306
## 2 AMB 0.0422 0.0483 -0.00614
## 3 AMB 0.0495 0.0483 0.00111
## 4 AMB 0.0473 0.0483 -0.00100
## 5 AMB 0.0526 0.0483 0.00425
## 6 AMB 0.0536 0.0483 0.00521
#Plot predicted (red) versus residuals (black)
fert_volume_res_scatter <- ggplot(fert_volume_res, aes(x = treatment, y = volume)) + # Plot volume versus treament as scatter
geom_point() +
geom_point(aes(y = predicted), color = "red") + # Add the predicted values
ylab("Volume mm2")+ xlab("Treatment") + #Label axes
theme_bw() + #Set background color
theme(panel.border = element_blank(), # Set border
panel.grid.major = element_blank(), #Set major gridlines
panel.grid.minor = element_blank(), #Set minor gridlines
axis.line = element_line(colour = "black"), #Set axes color
plot.background=element_blank()) #Set the plot background
fert_volume_res_scatter
#Histogram of residuals
fert_volume_res_hist <- fert_volume_res %>%
ggplot(aes(x = residuals, fill = volume)) +
geom_histogram(binwidth = 0.0025, color="#e9ecef", alpha=0.6, position = 'identity') +
labs(fill="") + # Plot volume versus treament as scatter
theme_bw() + #Set background color
theme(panel.border = element_blank(), # Set border
panel.grid.major = element_blank(), #Set major gridlines
panel.grid.minor = element_blank(), #Set minor gridlines
axis.line = element_line(colour = "black"), #Set axes color
plot.background=element_blank()) #Set the plot background)
fert_volume_res_hist
#Histogram of residuals separated by treatment
fert_volume_res_t_hist <- fert_volume_res %>%
ggplot(aes(x = residuals, fill = treatment)) +
geom_histogram(binwidth = 0.0025, color="#e9ecef", alpha=0.6, position = 'identity') +
labs(fill="") + # Plot volume versus treament as scatter
theme_bw() + #Set background color
theme(panel.border = element_blank(), # Set border
panel.grid.major = element_blank(), #Set major gridlines
panel.grid.minor = element_blank(), #Set minor gridlines
axis.line = element_line(colour = "black"), #Set axes color
plot.background=element_blank()) + #Set the plot background)
facet_wrap(facets = vars(treatment)) #Split 1 graph into three each showing a different treatment
fert_volume_res_t_hist
Testing for tank effects: 1-Way ANOVA of treated eggs volume against tank nested within treatment.
#1-Way ANOVA of volume against tank nested within treatment.
fert_volume_nested_anova <- aov(volume ~ treatment/TANK_NUM, data = fert_volume)
summary(fert_volume_nested_anova)
## Df Sum Sq Mean Sq F value Pr(>F)
## treatment 2 0.000140 0.0000702 1.37 0.256
## treatment:TANK_NUM 6 0.003343 0.0005571 10.88 1.4e-10 ***
## Residuals 216 0.011058 0.0000512
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Testing the effects of treatment on volume:
#1-Way ANOVA of volume against treatment
fert_volume_anova <- aov(volume ~ treatment, data = fert_volume)
summary(fert_volume_anova)
## Df Sum Sq Mean Sq F value Pr(>F)
## treatment 2 0.00014 7.015e-05 1.082 0.341
## Residuals 222 0.01440 6.487e-05
Examine summary statistics for treated eggs
treatedegg <- select(fert_volume, time_point:treatment, volume)
treatedegg.amb <- treatedegg %>% filter(treatment=="AMB")
treatedegg.amb.stats <- treatedegg.amb %>% summarise(mean=mean(volume), sd=sd(volume), min=min(volume), max=max(volume))
amb <- as.factor(c("AMB"))
treatedegg.amb.stats$treatment <- amb
treatedegg.l <- treatedegg %>% filter(treatment=="l")
treatedegg.l.stats <- treatedegg.l %>% summarise(mean=mean(volume), sd=sd(volume), min=min(volume), max=max(volume))
l <- as.factor(c("l"))
treatedegg.l.stats$treatment <- l
treatedegg.xl <- treatedegg %>% filter(treatment=="xl")
treatedegg.xl.stats <- treatedegg.xl %>% summarise(mean=mean(volume), sd=sd(volume), min=min(volume), max=max(volume))
xl <- as.factor(c("xl"))
treatedegg.xl.stats$treatment <- xl
treatedegg.stats.rownames <- as.factor(c("Treated_egg", "Treated_egg", "Treated_egg"))
treatedegg.stats <- as.data.frame(bind_rows(treatedegg.amb.stats, treatedegg.l.stats, treatedegg.xl.stats, .id=NULL))
treatedegg.stats$timepoint <-treatedegg.stats.rownames
treatedegg.stats <- treatedegg.stats[c(6,5,1,2,3,4)]
show(treatedegg.stats)
## timepoint treatment mean sd min max
## 1 Treated_egg AMB 0.04834626 0.008168517 0.02827965 0.07318527
## 2 Treated_egg l 0.04762602 0.006896299 0.03435164 0.07040271
## 3 Treated_egg xl 0.04954085 0.008961923 0.03259548 0.07502386
final_fert_volume <- fert_volume_t_boxplot +
annotate("text", x = 1.2, y = 0.068, label = "a") +
annotate("text", x = 2.2, y = 0.068, label = "a") +
annotate("text", x = 3.2, y = 0.068, label = "a")
final_fert_volume
gas_volume <- filter(raw_data, time_point == "Gastrulation") %>%
mutate(volume = ((4*pi*width1/6)*(width2/2)^2)) #Use equation for volume of ellipse
head(gas_volume)
## # A tibble: 6 x 8
## sample_ID time_point treatment TANK_NUM metric_id width1 width2 volume
## <fct> <fct> <fct> <fct> <fct> <dbl> <dbl> <dbl>
## 1 187 Gastrulation AMB 1 187_gastrulati… 0.614 0.456 0.0668
## 2 187 Gastrulation AMB 1 187_gastrulati… 0.571 0.492 0.0724
## 3 187 Gastrulation AMB 1 187_gastrulati… 0.581 0.489 0.0727
## 4 187 Gastrulation AMB 1 187_gastrulati… 0.572 0.382 0.0437
## 5 187 Gastrulation AMB 1 187_gastrulati… 0.701 0.37 0.0502
## 6 187 Gastrulation AMB 1 187_gastrulati… 0.551 0.511 0.0753
Examine gastrula volume for normal distribution using a boxplot with jitter and qq-plot. Examine variation between tanks using a boxplot with jitter.
#Boxplot colored by treatment
gas_volume_t_boxplot <- ggplot(gas_volume, aes(x = treatment, y = volume, color = treatment)) +
ylab("Volume mm2")+ xlab("Treatment") + #Label axes
geom_jitter(aes(alpha = 0.3)) +
geom_boxplot(alpha = 0) +
theme_bw() + #Set background color
theme(panel.border = element_blank(), # Set border
panel.grid.major = element_blank(), #Set major gridlines
panel.grid.minor = element_blank(), #Set minor gridlines
axis.line = element_line(colour = "black"), #Set axes color
plot.background=element_blank()) #Set the plot background
show(gas_volume_t_boxplot)
#Boxplot colored by TANK_NUM
gas_volume_s_boxplot <- ggplot(gas_volume, aes(x = treatment, y = volume, color = TANK_NUM)) +
ylab("Volume mm2")+ xlab("Treatment") + #Label axes
geom_jitter(aes(alpha = 0.3)) +
geom_boxplot(alpha = 0) +
theme_bw() + #Set background color
theme(panel.border = element_blank(), # Set border
panel.grid.major = element_blank(), #Set major gridlines
panel.grid.minor = element_blank(), #Set minor gridlines
axis.line = element_line(colour = "black"), #Set axes color
plot.background=element_blank()) #Set the plot background
show(gas_volume_s_boxplot)
#QQ-Plot
qqnorm(gas_volume$volume)
qqline(gas_volume$volume)
The qq-plot shows that the data does not look normal. We will transform the data below using the ‘SQRT’ function.
gas_volume_sqrt <- mutate(gas_volume, sqrt_volume = sqrt(volume)) # Transform the distribution
head(gas_volume_sqrt)
## # A tibble: 6 x 9
## sample_ID time_point treatment TANK_NUM metric_id width1 width2 volume
## <fct> <fct> <fct> <fct> <fct> <dbl> <dbl> <dbl>
## 1 187 Gastrulat… AMB 1 187_gast… 0.614 0.456 0.0668
## 2 187 Gastrulat… AMB 1 187_gast… 0.571 0.492 0.0724
## 3 187 Gastrulat… AMB 1 187_gast… 0.581 0.489 0.0727
## 4 187 Gastrulat… AMB 1 187_gast… 0.572 0.382 0.0437
## 5 187 Gastrulat… AMB 1 187_gast… 0.701 0.37 0.0502
## 6 187 Gastrulat… AMB 1 187_gast… 0.551 0.511 0.0753
## # … with 1 more variable: sqrt_volume <dbl>
Examine sqrt-transformed gastrula volume for normal distribution using a boxplot with jitter and qq-plot. Examine variation between tanks using a boxplot with jitter.
#Boxplot colored by treatment
gas_sqrt_volume_t_boxplot <- ggplot(gas_volume_sqrt, aes(x = treatment, y = (sqrt_volume)^2, color = treatment)) + #Square the sqrt volume to make size visually comparable across life stages
ylab(expression("Volume " (mm^2))) + xlab("Treatment") + #Label axes
geom_jitter(alpha = 0.5) +
geom_boxplot(alpha = 0) +
theme_bw() + #Set background color
theme(panel.border = element_blank(), # Set border
panel.grid.major = element_blank(), #Set major gridlines
panel.grid.minor = element_blank(), #Set minor gridlines
axis.line = element_line(colour = "black"), #Set axes color
plot.background=element_blank()) + #Set the plot background
ggtitle("(d)") + #add a main title
scale_color_manual(values=c(AMB="cadetblue3", l="palevioletred", xl="indianred3")) + #set treatment colors
theme(plot.title = element_text(face = 'bold',
size = 12,
hjust = 0),
legend.position = "none") #set title attributes
show(gas_sqrt_volume_t_boxplot)
#Boxplot colored by TANK_NUM
gas_sqrt_volume_s_boxplot <- ggplot(gas_volume_sqrt, aes(x = treatment, y = sqrt_volume, color = TANK_NUM)) +
ylab("Volume mm2")+ xlab("Treatment") + #Label axes
geom_jitter(aes(alpha = 0.3)) +
geom_boxplot(alpha = 0) +
theme_bw() + #Set background color
theme(panel.border = element_blank(), # Set border
panel.grid.major = element_blank(), #Set major gridlines
panel.grid.minor = element_blank(), #Set minor gridlines
axis.line = element_line(colour = "black"), #Set axes color
plot.background=element_blank()) #Set the plot background
show(gas_volume_s_boxplot)
#QQ-Plot
qqnorm(gas_volume_sqrt$sqrt_volume)
qqline(gas_volume_sqrt$sqrt_volume)
Examine sqrt-transformed gastrulation volume for equal variance with residual regression.
#Create dataframe of predicted and residual values for each volume
gas_sqrt_volume_res <- gas_volume_sqrt #Copy dataset to manipulate for residual regression
gas_sqrt_volume_fit <- lm(sqrt_volume ~ treatment, data = gas_sqrt_volume_res) # Fit the model
gas_sqrt_volume_res$predicted <- predict(gas_sqrt_volume_fit) # Save the predicted values
gas_sqrt_volume_res$residuals <- residuals(gas_sqrt_volume_fit) # Save the residual values
head(select(gas_sqrt_volume_res, treatment, sqrt_volume, predicted, residuals))
## # A tibble: 6 x 4
## treatment sqrt_volume predicted residuals
## <fct> <dbl> <dbl> <dbl>
## 1 AMB 0.259 0.252 0.00680
## 2 AMB 0.269 0.252 0.0173
## 3 AMB 0.270 0.252 0.0180
## 4 AMB 0.209 0.252 -0.0427
## 5 AMB 0.224 0.252 -0.0276
## 6 AMB 0.274 0.252 0.0227
#Plot predicted (red) versus residuals (black)
gas_sqrt_volume_res_scatter <- ggplot(gas_sqrt_volume_res, aes(x = treatment, y = sqrt_volume)) + # Plot volume versus treament as scatter
geom_point() +
geom_point(aes(y = predicted), color = "red") + # Add the predicted values +
theme_bw() + #Set background color
theme(panel.border = element_blank(), # Set border
panel.grid.major = element_blank(), #Set major gridlines
panel.grid.minor = element_blank(), #Set minor gridlines
axis.line = element_line(colour = "black"), #Set axes color
plot.background=element_blank()) #Set the plot background
gas_sqrt_volume_res_scatter
#Histogram of residuals
gas_sqrt_volume_res_hist <- gas_sqrt_volume_res %>%
ggplot(aes(x = residuals, fill = volume)) +
geom_histogram(binwidth = 0.01, color="#e9ecef", alpha=0.6, position = 'identity') +
labs(fill="") + # Plot volume versus treament as scatter
theme_bw() + #Set background color
theme(panel.border = element_blank(), # Set border
panel.grid.major = element_blank(), #Set major gridlines
panel.grid.minor = element_blank(), #Set minor gridlines
axis.line = element_line(colour = "black"), #Set axes color
plot.background=element_blank()) #Set the plot background)
gas_sqrt_volume_res_hist
#Histogram of residuals separated by treatment
gas_sqrt_volume_res_t_hist <- gas_sqrt_volume_res %>%
ggplot(aes(x = residuals, fill = treatment)) +
geom_histogram(binwidth = 0.01, color="#e9ecef", alpha=0.6, position = 'identity') +
labs(fill="") + # Plot volume versus treament as scatter
theme_bw() + #Set background color
theme(panel.border = element_blank(), # Set border
panel.grid.major = element_blank(), #Set major gridlines
panel.grid.minor = element_blank(), #Set minor gridlines
axis.line = element_line(colour = "black"), #Set axes color
plot.background=element_blank()) + #Set the plot background)
facet_wrap(facets = vars(treatment)) #Split 1 graph into three each showing a different treatment
gas_sqrt_volume_res_t_hist
Testing for tank effects: 1-Way ANOVA of sqrt-transformed planulae volume against tank nested within treatment.
#1-Way ANOVA of volume against tank nested within treatment.
gas_sqrt_volume_ts_anova <- aov(volume ~ treatment/TANK_NUM, data = gas_volume_sqrt)
summary(gas_sqrt_volume_ts_anova)
## Df Sum Sq Mean Sq F value Pr(>F)
## treatment 2 0.002446 0.0012231 8.490 0.000307 ***
## treatment:TANK_NUM 4 0.000510 0.0001275 0.885 0.474327
## Residuals 168 0.024204 0.0001441
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Testing the effects of treatment on volume:
#1-Way ANOVA of volume against treatment.
gas_sqrt_volume_txs_anova <- aov(sqrt_volume ~ treatment, data = gas_volume_sqrt)
summary(gas_sqrt_volume_txs_anova)
## Df Sum Sq Mean Sq F value Pr(>F)
## treatment 2 0.01002 0.005012 8.535 0.000292 ***
## Residuals 172 0.10101 0.000587
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(gas_sqrt_volume_txs_anova)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = sqrt_volume ~ treatment, data = gas_volume_sqrt)
##
## $treatment
## diff lwr upr p adj
## l-AMB -0.017399962 -0.027859991 -0.006939933 0.0003564
## xl-AMB -0.012094266 -0.022554295 -0.001634237 0.0188790
## xl-l 0.005305696 -0.006152691 0.016764084 0.5186419
Examine summary statistics for gastrula
gas <- select(gas_volume, time_point:treatment, volume)
gas.amb <- gas %>% filter(treatment=="AMB")
gas.amb.stats <- gas.amb %>% summarise(mean=mean(volume), sd=sd(volume), min=min(volume), max=max(volume))
gas.amb.stats$treatment <- amb
gas.l <- gas %>% filter(treatment=="l")
gas.l.stats <- gas.l %>% summarise(mean=mean(volume), sd=sd(volume), min=min(volume), max=max(volume))
gas.l.stats$treatment <- l
gas.xl <- gas %>% filter(treatment=="xl")
gas.xl.stats <- gas.xl %>% summarise(mean=mean(volume), sd=sd(volume), min=min(volume), max=max(volume))
gas.xl.stats$treatment <- xl
gas.stats.rownames <- as.factor(c("Gastrulation", "Gastrulation", "Gastrulation"))
gas.stats <- as.data.frame(bind_rows(gas.amb.stats, gas.l.stats, gas.xl.stats, .id=NULL))
gas.stats$timepoint <-gas.stats.rownames
gas.stats <- gas.stats[c(6,5,1,2,3,4)]
show(gas.stats)
## timepoint treatment mean sd min max
## 1 Gastrulation AMB 0.06404722 0.01338058 0.03962596 0.11147668
## 2 Gastrulation l 0.05556756 0.01218978 0.03560216 0.08503743
## 3 Gastrulation xl 0.05781054 0.00924022 0.03422066 0.07561363
final_gas_volume <- gas_sqrt_volume_t_boxplot +
annotate("text", x = 1.2, y = 0.1, label = "a") +
annotate("text", x = 2.2, y = 0.1, label = "b") +
annotate("text", x = 3.2, y = 0.1, label = "b")
final_gas_volume
planulae_volume <- filter(raw_data, time_point == "Planulae") %>%
mutate(volume = ((4*pi*width1/6)*(width2/2)^2)) #Use equation for volume of ellipse
head(planulae_volume)
## # A tibble: 6 x 8
## sample_ID time_point treatment TANK_NUM metric_id width1 width2 volume
## <fct> <fct> <fct> <fct> <fct> <dbl> <dbl> <dbl>
## 1 362 Planulae AMB 10 362_planulae_1 0.851 0.441 0.0867
## 2 362 Planulae AMB 10 362_planulae_2 0.817 0.444 0.0843
## 3 362 Planulae AMB 10 362_planulae_3 0.684 0.388 0.0539
## 4 362 Planulae AMB 10 362_planulae_4 0.796 0.515 0.111
## 5 362 Planulae AMB 10 362_planulae_5 0.75 0.452 0.0802
## 6 362 Planulae AMB 10 362_planulae_6 0.892 0.405 0.0766
Examine planula volume for normal distribution using a boxplot with jitter and qq-plot. Examine variation between tanks using a boxplot with jitter.
#Boxplot colored by treatment
planulae_volume_t_boxplot <- ggplot(planulae_volume, aes(x = treatment, y = volume, color = treatment)) +
ylab("Volume mm2")+ xlab("Treatment") + #Label axes
geom_jitter(aes(alpha = 0.3)) +
geom_boxplot(alpha = 0) +
theme_bw() + #Set background color
theme(panel.border = element_blank(), # Set border
panel.grid.major = element_blank(), #Set major gridlines
panel.grid.minor = element_blank(), #Set minor gridlines
axis.line = element_line(colour = "black"), #Set axes color
plot.background=element_blank()) #Set the plot background
show(planulae_volume_t_boxplot)
#Boxplot colored by TANK_NUM
planulae_volume_s_boxplot <- ggplot(planulae_volume, aes(x = treatment, y = volume, color = TANK_NUM)) +
ylab("Volume mm2")+ xlab("Treatment") + #Label axes
geom_jitter(aes(alpha = 0.3)) +
geom_boxplot(alpha = 0) +
theme_bw() + #Set background color
theme(panel.border = element_blank(), # Set border
panel.grid.major = element_blank(), #Set major gridlines
panel.grid.minor = element_blank(), #Set minor gridlines
axis.line = element_line(colour = "black"), #Set axes color
plot.background=element_blank()) #Set the plot background
show(planulae_volume_s_boxplot)
#QQ-Plot
qqnorm(planulae_volume$volume)
qqline(planulae_volume$volume)
The qq-plot shows that the data does not look normal. We will transform the data below using the ‘SQRT’ function.
planulae_volume_sqrt <- mutate(planulae_volume, sqrt_volume = sqrt(volume)) # Transform the distribution
head(planulae_volume_sqrt)
## # A tibble: 6 x 9
## sample_ID time_point treatment TANK_NUM metric_id width1 width2 volume
## <fct> <fct> <fct> <fct> <fct> <dbl> <dbl> <dbl>
## 1 362 Planulae AMB 10 362_plan… 0.851 0.441 0.0867
## 2 362 Planulae AMB 10 362_plan… 0.817 0.444 0.0843
## 3 362 Planulae AMB 10 362_plan… 0.684 0.388 0.0539
## 4 362 Planulae AMB 10 362_plan… 0.796 0.515 0.111
## 5 362 Planulae AMB 10 362_plan… 0.75 0.452 0.0802
## 6 362 Planulae AMB 10 362_plan… 0.892 0.405 0.0766
## # … with 1 more variable: sqrt_volume <dbl>
Examine sqrt-transformed planula volume for normal distribution using a boxplot with jitter and qq-plot. Examine variation between tanks using a boxplot with jitter.
#Boxplot colored by treatment
planulae_sqrt_volume_t_boxplot <- ggplot(planulae_volume_sqrt, aes(x = treatment, y = (sqrt_volume)^2, color = treatment)) + #Square the sqrt volume to make size visually comparable across life stages
ylab(expression("Volume " (mm^2))) + xlab("Treatment") + #Label axes
geom_jitter(alpha = 0.5) +
geom_boxplot(alpha = 0) +
theme_bw() + #Set background color
theme(panel.border = element_blank(), # Set border
panel.grid.major = element_blank(), #Set major gridlines
panel.grid.minor = element_blank(), #Set minor gridlines
axis.line = element_line(colour = "black"), #Set axes color
plot.background=element_blank()) + #Set the plot background
ggtitle("(e)") + #add a main title
scale_color_manual(values=c(AMB="cadetblue3", l="palevioletred", xl="indianred3")) + #set treatment colors
theme(plot.title = element_text(face = 'bold',
size = 12,
hjust = 0),
legend.position = ("none")) #set title attributes
planulae_sqrt_volume_t_boxplot
#Boxplot colored by TANK_NUM
planulae_sqrt_volume_s_boxplot <- ggplot(planulae_volume_sqrt, aes(x = treatment, y = sqrt_volume, color = TANK_NUM)) +
ylab("Volume mm2")+ xlab("Treatment") + #Label axes
geom_jitter(aes(alpha = 0.3)) +
geom_boxplot(alpha = 0) +
theme_bw() + #Set background color
theme(panel.border = element_blank(), # Set border
panel.grid.major = element_blank(), #Set major gridlines
panel.grid.minor = element_blank(), #Set minor gridlines
axis.line = element_line(colour = "black"), #Set axes color
plot.background=element_blank()) #Set the plot background
show(planulae_volume_s_boxplot)
#QQ-Plot
qqnorm(planulae_volume_sqrt$sqrt_volume)
qqline(planulae_volume_sqrt$sqrt_volume)
Examine sqrt-transformed planulae volume for equal variance with residual regression.
#Create dataframe of predicted and residual values for each volume
planulae_sqrt_volume_res <- planulae_volume_sqrt #Copy dataset to manipulate for residual regression
planulae_sqrt_volume_fit <- lm(sqrt_volume ~ treatment, data = planulae_sqrt_volume_res) # Fit the model
planulae_sqrt_volume_res$predicted <- predict(planulae_sqrt_volume_fit) # Save the predicted values
planulae_sqrt_volume_res$residuals <- residuals(planulae_sqrt_volume_fit) # Save the residual values
head(select(planulae_sqrt_volume_res, treatment, sqrt_volume, predicted, residuals))
## # A tibble: 6 x 4
## treatment sqrt_volume predicted residuals
## <fct> <dbl> <dbl> <dbl>
## 1 AMB 0.294 0.242 0.0526
## 2 AMB 0.290 0.242 0.0486
## 3 AMB 0.232 0.242 -0.00960
## 4 AMB 0.332 0.242 0.0907
## 5 AMB 0.283 0.242 0.0415
## 6 AMB 0.277 0.242 0.0350
#Plot predicted (red) versus residuals (black)
planulae_sqrt_volume_res_scatter <- ggplot(planulae_sqrt_volume_res, aes(x = treatment, y = sqrt_volume)) + # Plot volume versus treament as scatter
geom_point() +
geom_point(aes(y = predicted), color = "red") + # Add the predicted values +
theme_bw() + #Set background color
theme(panel.border = element_blank(), # Set border
panel.grid.major = element_blank(), #Set major gridlines
panel.grid.minor = element_blank(), #Set minor gridlines
axis.line = element_line(colour = "black"), #Set axes color
plot.background=element_blank()) #Set the plot background
planulae_sqrt_volume_res_scatter
#Histogram of residuals
planulae_sqrt_volume_res_hist <- planulae_sqrt_volume_res %>%
ggplot(aes(x = residuals, fill = volume)) +
geom_histogram(binwidth = 0.01, color="#e9ecef", alpha=0.6, position = 'identity') +
labs(fill="") + # Plot volume versus treament as scatter
theme_bw() + #Set background color
theme(panel.border = element_blank(), # Set border
panel.grid.major = element_blank(), #Set major gridlines
panel.grid.minor = element_blank(), #Set minor gridlines
axis.line = element_line(colour = "black"), #Set axes color
plot.background=element_blank()) #Set the plot background)
planulae_sqrt_volume_res_hist
#Histogram of residuals separated by treatment
planulae_sqrt_volume_res_t_hist <- planulae_sqrt_volume_res %>%
ggplot(aes(x = residuals, fill = treatment)) +
geom_histogram(binwidth = 0.01, color="#e9ecef", alpha=0.6, position = 'identity') +
labs(fill="") + # Plot volume versus treament as scatter
theme_bw() + #Set background color
theme(panel.border = element_blank(), # Set border
panel.grid.major = element_blank(), #Set major gridlines
panel.grid.minor = element_blank(), #Set minor gridlines
axis.line = element_line(colour = "black"), #Set axes color
plot.background=element_blank()) + #Set the plot background)
facet_wrap(facets = vars(treatment)) #Split 1 graph into three each showing a different treatment
planulae_sqrt_volume_res_t_hist
Testing for tank effects: 1-Way ANOVA of sqrt-transformed planulae volume against treatment. 1-Way ANOVA of volume against tank nested within treatment.
#1-Way ANOVA of volume against tank nested within treatment.
planulae_sqrt_volume_ts_anova <- aov(volume ~ treatment/TANK_NUM, data = planulae_volume_sqrt)
summary(planulae_sqrt_volume_ts_anova)
## Df Sum Sq Mean Sq F value Pr(>F)
## treatment 2 0.00506 0.0025286 10.30 6.57e-05 ***
## treatment:TANK_NUM 3 0.00144 0.0004809 1.96 0.123
## Residuals 144 0.03534 0.0002454
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Testing the effects of treatment on volume:
#1-Way ANOVA of volume against treatment
planulae_sqrt_volume_txs_anova <- aov(sqrt_volume ~ treatment, data = planulae_volume_sqrt)
summary(planulae_sqrt_volume_txs_anova)
## Df Sum Sq Mean Sq F value Pr(>F)
## treatment 2 0.02277 0.011387 9.716 0.000109 ***
## Residuals 147 0.17227 0.001172
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(planulae_sqrt_volume_txs_anova)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = sqrt_volume ~ treatment, data = planulae_volume_sqrt)
##
## $treatment
## diff lwr upr p adj
## l-AMB -0.029414826 -0.045625704 -0.013203947 0.0000928
## xl-AMB -0.020562553 -0.036773432 -0.004351675 0.0087664
## xl-l 0.008852273 -0.007358606 0.025063151 0.4014180
Examine summary statistics for planula
plan <- select(planulae_volume, time_point:treatment, volume)
plan.amb <- plan %>% filter(treatment=="AMB")
plan.amb.stats <- plan.amb %>% summarise(mean=mean(volume), sd=sd(volume), min=min(volume), max=max(volume))
plan.amb.stats$treatment <- amb
plan.l <- plan %>% filter(treatment=="l")
plan.l.stats <- plan.l %>% summarise(mean=mean(volume), sd=sd(volume), min=min(volume), max=max(volume))
plan.l.stats$treatment <- l
plan.xl <- plan %>% filter(treatment=="xl")
plan.xl.stats <- plan.xl %>% summarise(mean=mean(volume), sd=sd(volume), min=min(volume), max=max(volume))
plan.xl.stats$treatment <- xl
plan.stats.rownames <- as.factor(c("Planulae", "Planulae", "Planulae"))
plan.stats <- as.data.frame(bind_rows(plan.amb.stats, plan.l.stats, plan.xl.stats, .id=NULL))
plan.stats$timepoint <-plan.stats.rownames
plan.stats <- plan.stats[c(6,5,1,2,3,4)]
show(plan.stats)
## timepoint treatment mean sd min max
## 1 Planulae AMB 0.05984008 0.01878055 0.02089216 0.11329448
## 2 Planulae l 0.04598178 0.01271031 0.02244043 0.07760985
## 3 Planulae xl 0.05013986 0.01537470 0.01709592 0.08994879
volume.stats <- as.data.frame(bind_rows(untreatedegg.stats, treatedegg.stats, gas.stats, plan.stats, .id=NULL))
volume.stats <- volume.stats[c(6,1,2,3,4,5)]
show(volume.stats)
## max timepoint treatment mean sd min
## 1 0.08299064 Untreated_Eggs NA 0.03958601 0.023022591 0.001504261
## 2 0.07318527 Treated_egg AMB 0.04834626 0.008168517 0.028279649
## 3 0.07040271 Treated_egg l 0.04762602 0.006896299 0.034351644
## 4 0.07502386 Treated_egg xl 0.04954085 0.008961923 0.032595475
## 5 0.11147668 Gastrulation AMB 0.06404722 0.013380581 0.039625961
## 6 0.08503743 Gastrulation l 0.05556756 0.012189782 0.035602165
## 7 0.07561363 Gastrulation xl 0.05781054 0.009240220 0.034220665
## 8 0.11329448 Planulae AMB 0.05984008 0.018780552 0.020892161
## 9 0.07760985 Planulae l 0.04598178 0.012710312 0.022440434
## 10 0.08994879 Planulae xl 0.05013986 0.015374697 0.017095919
final_plan_volume <- planulae_sqrt_volume_t_boxplot +
annotate("text", x = 1.2, y = 0.105, label = "a") +
annotate("text", x = 2.2, y = 0.105, label = "b") +
annotate("text", x = 3.2, y = 0.105, label = "b")
final_plan_volume
fig_2 <- grid.arrange(final_cell_cleavage, arrangeGrob(final_egg_volume, final_fert_volume, final_gas_volume, final_plan_volume, ncol = 2, nrow = 2), ncol = 2, nrow = 1, widths = c(3,7), clip = "off")
ggsave("5-Developmental_progression_and_size/Output/fig2_embryo_development.pdf", fig_2, width = 16, height = 8, units = c("in"))